## master preamble ------------------------------------------------------
rm(list=ls(all=TRUE)) 

## inherit directories from stata -----------
args = commandArgs(trailingOnly = "TRUE")
if (length(args)) {
  dir.proj = paste0(args[1],'/')
  dir.lib  = args[2]
} 

## if loading from library -------------------
if(dir.lib!='') {
  ## load depenencies --------------------
  library(R.utils,lib.loc=dir.lib)
  library(ggplot2,lib.loc=dir.lib)
  library(tidyr,lib.loc=dir.lib)
  library(stringr,lib.loc=dir.lib)
  library(forcats,lib.loc=dir.lib)
  library(lubridate,lib.loc=dir.lib)
  library(haven,lib.loc=dir.lib)
  ## load main packages ------------------
  library(rio,lib.loc=dir.lib)
  library(dplyr,lib.loc=dir.lib)
  library(tidyverse,lib.loc=dir.lib)
  library(fixest,lib.loc=dir.lib)
## otherwise: simple load -----------------
} else {
  library(rio)
  library(dplyr)
  library(tidyverse)
  library(fixest)
}
## --------------------------------------------------------------------


## file preamble ------------------------------------------------------
## set randomization seed --------------
set.seed(413)
## directories --------
dir.data = paste0(dir.proj,'data/')
dir.est  = paste0(dir.proj,'estimates/')
dir.out  = paste0(dir.proj,'output/')
dir.temp = paste0(dir.proj,'_temp/')
## bootstraps -----------
bootrep  = 100
## functions ------------
rudirichlet <- function(n, d) {
  rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
  dirichlet.weights <- rexp.mat / rowSums(rexp.mat)
  dirichlet.weights }
## --------------------------------------------------------------------



###############################################
### Step 1: Estimate grouped, full sample -----------------

## Read in data ------------
data = import(paste0(dir.temp,'mono_data.dta')) %>% filter(!is.na(Zw))

## Setup for clustered bootstrap -------------------
list    = unique(dplyr::select(data,jid))
weights = rudirichlet(nrow(list),bootrep)*10

## Looping -----------
for(j in 0:bootrep) {
  
  ## Setup bootstrap data --------
  if(j==0) {
    boot.list = list %>% mutate(bootwt = 1)
    boot.data = inner_join(data,boot.list,by='jid') }
  if(j>=1) {
    boot.list = list %>% mutate(bootwt = weights[,j])
    boot.data = inner_join(data,boot.list,by='jid') }
  
  ## Construct weights ------
  wt = boot.data %>% group_by(ocell) %>% summarize(N=sum(bootwt)) %>% mutate(wt=N/sum(N))
  
  ## Estimate regression (standard Z) -----
  fit.1 = feols(Y~0|totfe+W|i(ocell,D)~i(ocell,Z),boot.data,weights=~bootwt)
  
  ## Estimate regression (grouped Z) -----
  fit.2 = feols(Y~0|totfe+W|i(ocell,D)~i(ocell,Zw),boot.data,weights=~bootwt)
  
  ## Aggregate up -----------
  est.1 = data.frame(
    est=sum(wt$wt*fit.1$coefficients),iter=j,type=3)
  est.2 = data.frame(
    est=sum(wt$wt*fit.2$coefficients),iter=j,type=4)
  
  ## Store ---------
  if(j==0) {
    boot.out = bind_rows(est.1,est.2) }
  else {
    boot.out = bind_rows(boot.out,est.1,est.2) }
  
  ## Progress report -----
  print(paste('Iteration',j,'of',bootrep,'Completed'))
  
}

## Compute estimate ------
est = boot.out %>% filter(iter==0) %>% dplyr::select(est,type)
se  = boot.out %>% filter(iter>0) %>% group_by(type) %>% summarize(se=sd(est))
step.1 = inner_join(est,se,by='type') %>% mutate(samp=1) %>% dplyr::select(samp,type,est,se)




###############################################
### Step 2: Estimate grouped, FLL sample -----------------

## Read in data ------------
data = import(paste0(dir.temp,'mono_data.dta')) %>% filter(!is.na(Zw)) %>% filter(fll==1)

## Setup for clustered bootstrap -------------------
list    = unique(dplyr::select(data,jid))
weights = rudirichlet(nrow(list),bootrep)*10

## Looping -----------
for(j in 0:bootrep) {
  
  ## Setup bootstrap data --------
  if(j==0) {
    boot.list = list %>% mutate(bootwt = 1)
    boot.data = inner_join(data,boot.list,by='jid') }
  if(j>=1) {
    boot.list = list %>% mutate(bootwt = weights[,j])
    boot.data = inner_join(data,boot.list,by='jid') }
  
  ## Construct weights ------
  wt = boot.data %>% group_by(ocell) %>% summarize(N=sum(bootwt)) %>% mutate(wt=N/sum(N))
  
  ## Estimate regression (standard Z) -----
  fit.1 = feols(Y~0|totfe+W|i(ocell,D)~i(ocell,Z),boot.data,weights=~bootwt)
  
  ## Estimate regression (grouped Z) -----
  fit.2 = feols(Y~0|totfe+W|i(ocell,D)~i(ocell,Zw),boot.data,weights=~bootwt)
  
  ## Aggregate up -----------
  est.1 = data.frame(
    est=sum(wt$wt*fit.1$coefficients),iter=j,type=3)
  est.2 = data.frame(
    est=sum(wt$wt*fit.2$coefficients),iter=j,type=4)
  
  ## Store ---------
  if(j==0) {
    boot.out = bind_rows(est.1,est.2) }
  else {
    boot.out = bind_rows(boot.out,est.1,est.2) }
  
  ## Progress report -----
  print(paste('Iteration',j,'of',bootrep,'Completed'))
  
}

## Compute estimate ------
est = boot.out %>% filter(iter==0) %>% dplyr::select(est,type)
se  = boot.out %>% filter(iter>0) %>% group_by(type) %>% summarize(se=sd(est))
step.2 = inner_join(est,se,by='type') %>% mutate(samp=2) %>% dplyr::select(samp,type,est,se)



###############################################
### Step 3: Other ests (no bootstrap) -----------------

data = import(paste0(dir.temp,'mono_data.dta'))

## Full Sample ------------
fit = feols(Y~0|totfe|D~Z,data,vcov=~jid)
est.1 = bind_rows(
  step.1,
  data.frame(
    samp=1,type=1,est=as.numeric(fit$coefficients),se=as.numeric(fit$se)))

fit = feols(Y~0|totfe+W|D~Zw,data,vcov=~jid)
est.1 = bind_rows(
  est.1,
  data.frame(
    samp=1,type=2,est=as.numeric(fit$coefficients),se=as.numeric(fit$se))) %>%
  arrange(type)


## FLL Sample ------------
data = filter(data,fll==1)
fit = feols(Y~0|totfe|D~Z,data,vcov=~jid)
est.2 = bind_rows(
  step.2,
  data.frame(
    samp=2,type=1,est=as.numeric(fit$coefficients),se=as.numeric(fit$se)))

fit = feols(Y~0|totfe+W|D~Zw,data,vcov=~jid)
est.2 = bind_rows(
  est.2,
  data.frame(
    samp=2,type=1,est=as.numeric(fit$coefficients),se=as.numeric(fit$se))) %>% 
  arrange(type)


###############################################
### Step 4: write output to LaTeX table -----------------

## Initialize connection -------
tabfile = paste0(dir.out,'apx_deter/table_grouped.tex')
tab = file(tabfile)

## Write table -------
writeLines(c("& (1) & (2) & (3) & (4) \\\\",
             "& Baseline & Driver Cells & Officer Cells & Both \\\\",
             "\\hline \\\\ [-1.8ex]"),tab)

b.wr = paste0(round(est.1$est,4))
s.wr = paste0('(',round(est.1$se,4),')')
write(paste0('Full Sample&',paste(b.wr,collapse='&'),'\\\\'),tabfile,append=T)
write(paste0('&',paste(s.wr,collapse='&'),'\\\\'),tabfile,append=T)

b.wr = paste0(round(est.2$est,4))
s.wr = paste0('(',round(est.2$se,4),')')
write(paste0('[1.5ex] FLL Officers&',paste(b.wr,collapse='&'),'\\\\'),tabfile,append=T)
write(paste0('&',paste(s.wr,collapse='&'),'\\\\'),tabfile,append=T)

## Close connection -----
close(tab)



## delete temporary data file ----------------
file.remove(paste0(dir.temp,'mono_data.dta'))





